    .text                     # Start text section
    #.balign 4                # align section to 4 bytes - which is also the default?
    .global solve         # define global function symbol
solve:
# int strcmp(const char *src1, const char* src2)
strcmp:
 ## Using LMUL=2, but same register names work for larger LMULs
 li t1, 0 # Initial pointer bump
loop:
 vsetvli t0, x0, e8, m2, ta, ma # Max length vectors of bytes
 add a0, a0, t1 # Bump src1 pointer
 vle8ff.v v8, (a0) # Get src1 bytes
 add a1, a1, t1 # Bump src2 pointer
 vle8ff.v v16, (a1) # Get src2 bytes
 vmseq.vi v0, v8, 0 # Flag zero bytes in src1
 vmsne.vv v1, v8, v16 # Flag if src1 != src2
 vmor.mm v0, v0, v1 # Combine exit conditions
 vfirst.m a2, v0 # ==0 or != ?
 csrr t1, vl # Get number of bytes fetched
 bltz a2, loop # Loop if all same and no zero byte
 add a0, a0, a2 # Get src1 element address
 lbu a3, (a0) # Get src1 byte from memory
 add a1, a1, a2 # Get src2 element address
 lbu a4, (a1) # Get src2 byte from memory
 sub a0, a3, a4 # Return value.
 
ret

